import requests
import re
import sys
import openpyxl

# xing men
#
null = ""
true = ''
false = ''
# url = "https://m.shuge9.com/read/20/20022/7891799.html"
url = "https://xzhouse.com.cn/house/news/getNewsByType.do"
detail_url = 'https://xzhouse.com.cn/house/news/getXwBigText.do'
d = {'currPageNo': 1, 'pageSize': 10, 'type': 5, 'xzqh': 320300}
html = requests.post(url, data=d)
# html.encoding = 'gbk'
pattern = re.compile(r'[^\u4e00-\u9fa5]')
# chinese = re.sub(pattern, "", html.text)
# print(html.text)

res = eval(html.text)
# print(res['attributes'])

print(res)
page_total = res['attributes']['pageCount']
counts_total = res['attributes']['totalCount']

book_name_xlsx = '房子销售统计.xlsx'

sheet_name_xlsx = '销售统计'

value3 = [["标题", "内容"] ]
def write_excel_xlsx(path, sheet_name, value):
    index = len(value)
    workbook = openpyxl.Workbook()
    sheet = workbook.active
    sheet.title = sheet_name
    for i in range(0, index):
        for j in range(0, len(value[i])):
            sheet.cell(row=i+1, column=j+1, value=str(value[i][j]))
    workbook.save(path)
    print("xlsx格式表格写入数据成功！")


def read_excel_xlsx(path, sheet_name):
    workbook = openpyxl.load_workbook(path)
    # sheet = wb.get_sheet_by_name(sheet_name)这种方式已经弃用，不建议使用
    sheet = workbook[sheet_name]
    for row in sheet.rows:
        for cell in row:
            print(cell.value, "\t", end="")
        print()


def zhui_jia_excel_xlsx(path, sheet_name, value):
    workbook = openpyxl.load_workbook(path)
    sheet = workbook[sheet_name]
    sheet.append(value)
    workbook.save(path)

#write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, value3)

for i in range(1, int(page_total), 1):
    d = {'currPageNo': i, 'pageSize': 10, 'type': 5, 'xzqh': 320300}
    tmp_res = requests.post(url, data=d)
    #print(tmp_res)
    tmp_dict = eval(tmp_res.text)
    if i < 19:
        continue
    for xx in tmp_dict['obj']:
        # print(xx['content'])
        #print()
        ttt_dd = {'id': xx['nrid']}
        xxd_res = requests.post(detail_url, data=ttt_dd)

        #pattern = re.compile(r'[^\u4e00-\u9fa5]')
        #chinese = re.sub(pattern, "", xxd_res.text)

        #print(xxd_res.text)
        news_detail=eval(xxd_res.text)
        # print(len(news_detail))
        # print(news_detail.keys())
        # print()

        detaiil_content=news_detail['obj']['content']
        pattern = re.compile(r'<[^>]+>', re.S)
        #pattern = re.compile('(?<=\>).*?(?=\<)')


        chinese = pattern.sub('', detaiil_content)

        chinese=chinese.replace('\n', '')
        chinese=chinese.replace('\r', '')

        #去除{}以及其中间的内容
        chinese=re.sub('\\{.*?\\}', '', chinese)
        #去除字母和@
        chinese=re.sub('[a-zA-Z@.-]', '',chinese)
        #去除00
        #chinese=chinese.replace('00', '')
        print(news_detail['obj']['mc'])
        print(chinese)
        zhui_jia_excel_xlsx(book_name_xlsx, sheet_name_xlsx,[news_detail['obj']['mc'],chinese])
        #value3.append()
        print("===============================================================================================================================================")
        #sys.exit()

